Integrating labels from the Cui dataset

Onto the in vitro CM models dataset.

## Storing results ##
#referenceset_location <- "../scrna/publicdata/cui2019/R_analysis/20200609_results_atrial-ventricle_UMAP/seuset_HVG+reddim_PC1-15_res0.5.rds"

## DATASET LOCATION:
referenceset_location <- "../scrna/publicdata/asp_cui_integration/R_analysis/20210820_OtherVisualizations_Woodshall/seusetcombinedv3_PC1-20_res1.rds"
dataset_location <-  "../scrna/CMdiff_mon-EB-EHT_combined/rerun_v4/clust_umap_R/20211026_GOterms_coarseReclust_DE-TFs/seusetv3_PC1-50_res1.5_coarse.rds"
# The folder you want to save the R_results folder in, the folders will be made if the path does not exist yet.
workdir <- "../scrna/CMdiff_mon-EB-EHT_combined/rerun_v4/clust_umap_R/"
# if regression is performed: this will already be included in the folder name
result_descript = "_IntegrationPrediction_CuiAsp"

selected_markers2 = c("GATA4","NKX2-5","ISL1","PDGFRA","RYR2","TNNT2","MYL7","PECAM1","MYL2","IRX4","COL3A1","MKI67")

PC_set = 20
w_umappatch = 12
h_umappatch = 12
library(Seurat)
## Attaching SeuratObject
library(patchwork)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(pheatmap)

Setting up results directory

dateoftoday <- gsub("-", "", as.character(Sys.Date()))
resultsdir <- paste(workdir, "/", dateoftoday, result_descript, sep = "")
system(paste("mkdir -p ", resultsdir))
knitr::opts_knit$set(root.dir = normalizePath(resultsdir))

Results will be stored in: resultsdir

seuset.list <- list()
seuset.list[["reference"]] <- readRDS(referenceset_location)
seuset.list[["CMset"]] <- readRDS(dataset_location)
seuset.query <- seuset.list[["CMset"]]
seuset.reference <- seuset.list[["reference"]]
#seuset.features <- SelectIntegrationFeatures(object.list = seuset.list, reference = "reference", nfeatures = 3000)
DimPlot(seuset.reference, group.by = "seurat_clusters")

DimPlot(seuset.reference, group.by = "cluster_annotation")

DimPlot(seuset.reference, group.by = "timepoint")

pdf(paste0("Featureplot_PC1-", PC_set, "_SelectedMarkers2.pdf"), width = 20, height = 15)
FeaturePlot(seuset.reference, selected_markers2)
dev.off()
## png 
##   2

Starting integration:

DefaultAssay(seuset.reference) <- "integrated"
SetIdent(seuset.reference, value = "cluster_annotation")
## An object of class Seurat 
## 26734 features across 7730 samples within 2 assays 
## Active assay: integrated (4000 features, 4000 variable features)
##  1 other assay present: sf
##  2 dimensional reductions calculated: pca, umap
DefaultAssay(seuset.query) <- "integrated"
SetIdent(seuset.query, value = "coarse_seurat_clusters")
## An object of class Seurat 
## 43598 features across 12706 samples within 3 assays 
## Active assay: integrated (2000 features, 2000 variable features)
##  2 other assays present: sf, uf
##  2 dimensional reductions calculated: pca, umap
head(VariableFeatures(seuset.reference), n = 10)
##  [1] "ITLN1"  "SPP1"   "CCL3"   "C1QB"   "CXCL14" "CD74"   "PENK"   "NPPB"  
##  [9] "C1QA"   "SFRP2"
seuset.anchors <- FindTransferAnchors(reference = seuset.reference, 
                                      query = seuset.query, 
                                      reference.assay = "integrated", 
                                      query.assay = "integrated", 
                                      dims = 1:PC_set)
## Performing PCA on the provided reference using 1315 features as input.
## Projecting cell embeddings
## Finding neighborhoods
## Finding anchors
##  Found 2262 anchors
## Filtering anchors
##  Retained 762 anchors
predictions <- TransferData(anchorset = seuset.anchors, refdata = seuset.reference$cluster_annotation, dims = 1:PC_set)
## Finding integration vectors
## Finding integration vector weights
## Predicting cell labels
write.csv(predictions, "Cui-Asp_onto_inVitro_prediction_scores.csv", quote = FALSE)

predictions_num <- predictions[,!colnames(predictions) %in% c("predicted.id")]
predictions_num$cluster <- seuset.query@meta.data$coarse_seurat_clusters
#predictions_num$cluster <- seuset.query@meta.data[rownames(predictions_num),]$coarse_seurat_clusters
predictions_num <- predictions_num %>%
  group_by(cluster) %>%
  summarise(across(everything(), mean))

predictions_num <- as.data.frame(predictions_num)
rownames(predictions_num) <- predictions_num$cluster
predictions_num <- predictions_num[,-1]
predictions_num <- predictions_num[,!colnames(predictions_num) %in% c("cluster", "prediction.score.max")]
predictions_mat <- as.matrix(predictions_num)

dotchart(predictions_mat,
         cex = 0.6)

pdf("PredictionScores_Heatmap.pdf", useDingbats = FALSE, height = 15, width = 7)
heatmap(predictions_mat, margins = c(15,5))
dev.off()
## png 
##   2
write.csv(predictions_mat, "predictions_matrix_meanpercluster.csv")
# Generate heatmap with pheatmap
#predictions_mat <- read.csv("predictions_matrix_meanpercluster.csv", header = TRUE, row.names = 1)

colnames(predictions_mat) <- gsub("prediction.score.", "", colnames(predictions_mat))

pdf("PredictionScores_coarse-clust_pheatmap.pdf", useDingbats = FALSE, height = 10, width = 7)
print(pheatmap(predictions_mat, cluster_rows = FALSE, cluster_cols = TRUE))
dev.off()
## pdf 
##   3
pdf("PredictionScores_coarse-clust_pheatmap_rowclust.pdf", useDingbats = FALSE, height = 8, width = 5)
print(pheatmap(predictions_mat, cluster_rows = TRUE, cluster_cols = TRUE))
dev.off()
## pdf 
##   3
seuset <- seuset.query
seuset$predicted.id <- predictions$predicted.id
DimPlot(seuset, group.by = "predicted.id")

unique(seuset$predicted.id)
##  [1] "Endothelium / Pericytes / Adventitia"          
##  [2] "Epicardium-derived cells"                      
##  [3] "Late atrial CM"                                
##  [4] "Epicardial cells"                              
##  [5] "Fibroblast-like (smaller vascular development)"
##  [6] "Dividing fibroblasts"                          
##  [7] "Early mixed CM"                                
##  [8] "Fibroblast-like (cardiac skeleton)"            
##  [9] "Erythrocytes"                                  
## [10] "Early ventricular CM"                          
## [11] "Immune cells?"                                 
## [12] "Late ventricular CM"                           
## [13] "Fibroblast-like (larger vascular development)" 
## [14] "Mixed CM / Other"                              
## [15] "W5 atrial CM"                                  
## [16] "Dividing endothelium"                          
## [17] "Late capillary endothelium"                    
## [18] "Early capillary endothelium"                   
## [19] "Smooth muscle / Fibroblast"                    
## [20] "Immune cells 2"
pdf(paste0("UMAPs_PC1-", PC_set, "_PredictedId.pdf"), width = 13, height = 10)
DimPlot(seuset, reduction = "umap", group.by = "predicted.id", label = TRUE, combine = TRUE, pt.size = 1.5)
dev.off()
## png 
##   2
pl.list <- list()
for (i in c("predicted.id","Method","Timepoint","Lineage", "seurat_clusters","Phase")){
  print(i)
  pl.list[[i]] <- DimPlot(seuset, reduction = "umap", group.by = i, combine = TRUE)
  print(pl.list[[i]])
}
## [1] "predicted.id"

## [1] "Method"

## [1] "Timepoint"

## [1] "Lineage"

## [1] "seurat_clusters"

## [1] "Phase"

pdf(paste0("IntegratedData_PredictedCui_UMAPs_PC1-", PC_set, ".pdf"), width = w_umappatch, height = h_umappatch)
Reduce( `+`, pl.list ) +
      patchwork::plot_layout( ncol = 2 )
dev.off()
## png 
##   2
sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-conda-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.3 LTS
## 
## Matrix products: default
## BLAS/LAPACK: /vol/mbconda/snabel/anaconda3/envs/kb_scrna_R_mon3/lib/libopenblasp-r0.3.17.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] pheatmap_1.0.12    tidyr_1.1.3        dplyr_1.0.7        patchwork_1.1.1   
## [5] SeuratObject_4.0.2 Seurat_4.0.4      
## 
## loaded via a namespace (and not attached):
##   [1] Rtsne_0.15            colorspace_2.0-2      deldir_0.2-10        
##   [4] ellipsis_0.3.2        ggridges_0.5.3        spatstat.data_2.1-0  
##   [7] farver_2.1.0          leiden_0.3.9          listenv_0.8.0        
##  [10] ggrepel_0.9.1         fansi_0.5.0           codetools_0.2-18     
##  [13] splines_4.1.0         knitr_1.33            polyclip_1.10-0      
##  [16] jsonlite_1.7.2        ica_1.0-2             cluster_2.1.2        
##  [19] png_0.1-7             uwot_0.1.10           shiny_1.6.0          
##  [22] sctransform_0.3.2     spatstat.sparse_2.0-0 compiler_4.1.0       
##  [25] httr_1.4.2            assertthat_0.2.1      Matrix_1.3-4         
##  [28] fastmap_1.1.0         lazyeval_0.2.2        later_1.3.0          
##  [31] htmltools_0.5.2       tools_4.1.0           igraph_1.2.6         
##  [34] gtable_0.3.0          glue_1.4.2            RANN_2.6.1           
##  [37] reshape2_1.4.4        Rcpp_1.0.7            scattermore_0.7      
##  [40] jquerylib_0.1.4       vctrs_0.3.8           nlme_3.1-152         
##  [43] lmtest_0.9-38         xfun_0.25             stringr_1.4.0        
##  [46] globals_0.14.0        mime_0.11             miniUI_0.1.1.1       
##  [49] lifecycle_1.0.0       irlba_2.3.3           goftest_1.2-2        
##  [52] future_1.22.1         MASS_7.3-54           zoo_1.8-9            
##  [55] scales_1.1.1          spatstat.core_2.3-0   promises_1.2.0.1     
##  [58] spatstat.utils_2.2-0  parallel_4.1.0        RColorBrewer_1.1-2   
##  [61] yaml_2.2.1            reticulate_1.20       pbapply_1.4-3        
##  [64] gridExtra_2.3         ggplot2_3.3.5         sass_0.4.0           
##  [67] rpart_4.1-15          stringi_1.7.4         highr_0.9            
##  [70] rlang_0.4.11          pkgconfig_2.0.3       matrixStats_0.60.1   
##  [73] evaluate_0.14         lattice_0.20-44       ROCR_1.0-11          
##  [76] purrr_0.3.4           tensor_1.5            labeling_0.4.2       
##  [79] htmlwidgets_1.5.3     cowplot_1.1.1         tidyselect_1.1.1     
##  [82] parallelly_1.27.0     RcppAnnoy_0.0.19      plyr_1.8.6           
##  [85] magrittr_2.0.1        R6_2.5.1              generics_0.1.0       
##  [88] DBI_1.1.1             pillar_1.6.2          mgcv_1.8-36          
##  [91] fitdistrplus_1.1-5    survival_3.2-13       abind_1.4-5          
##  [94] tibble_3.1.4          future.apply_1.8.1    crayon_1.4.1         
##  [97] KernSmooth_2.23-20    utf8_1.2.2            spatstat.geom_2.2-2  
## [100] plotly_4.9.4.1        rmarkdown_2.10        grid_4.1.0           
## [103] data.table_1.14.0     digest_0.6.27         xtable_1.8-4         
## [106] httpuv_1.6.2          munsell_0.5.0         viridisLite_0.4.0    
## [109] bslib_0.3.0